-
-
Save eternal-flame-AD/7c9448fcc8627247240aa3e61e61ae30 to your computer and use it in GitHub Desktop.
Misskey SPAM statistics on database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BEGIN TRANSACTION READ WRITE; | |
-- write helper functions to yume schema | |
CREATE schema IF NOT EXISTS yume; | |
-- is default ID | |
CREATE OR REPLACE FUNCTION yume.is_default_name(text) RETURNS boolean AS $$ | |
SELECT $1 ~ '^[0-9a-z]{10}$' $$ LANGUAGE SQL IMMUTABLE LEAKPROOF; | |
-- base36 to int | |
CREATE OR REPLACE FUNCTION yume.base36_to_int(text) RETURNS bigint AS $$ | |
DECLARE result bigint := 0; | |
c char; | |
cd int; | |
len int; | |
BEGIN | |
len := length($1); | |
FOR i IN 1..len LOOP | |
c := substring($1 FROM i FOR 1); | |
cd := ascii(c); | |
IF cd >= ASCII('0') AND cd <= ASCII('9') | |
THEN result := result * 36 + cd - ASCII('0'); | |
ELSE | |
IF cd >= ASCII('a') AND cd <= ASCII('z') | |
THEN result := result * 36 + cd - ASCII('a') + 10; | |
ELSE RAISE EXCEPTION 'Invalid character in base36 string: %', c; | |
END IF; | |
END IF; | |
END LOOP; | |
RETURN result; | |
END; | |
$$ LANGUAGE plpgsql IMMUTABLE LEAKPROOF; | |
-- AIDX index to millis from 2000-01-01 | |
CREATE OR REPLACE FUNCTION yume.aidx_millis(text) RETURNS bigint AS $$ | |
SELECT yume.base36_to_int(SUBSTRING($1 FROM 1 FOR 8)); | |
$$ LANGUAGE SQL IMMUTABLE LEAKPROOF COST 0.5; | |
-- AIDX index to timestamp | |
CREATE OR REPLACE FUNCTION yume.aidx_time(text) RETURNS timestamp AS $$ | |
SELECT make_timestamptz(2000, 1, 1, 0, 0, 0, 'UTC') + | |
yume.aidx_millis($1) / 1000 * interval '1 second' + | |
yume.aidx_millis($1) % 1000 * interval '1 millisecond'; | |
$$ LANGUAGE SQL IMMUTABLE LEAKPROOF COST 1; | |
-- hostname last component | |
CREATE OR REPLACE FUNCTION yume.host_last_component(text) RETURNS text AS $$ | |
SELECT substring((select regexp_matches($1, '\.[a-z]{2,}$'))[1] FROM 2) LIMIT 1; | |
$$ LANGUAGE SQL IMMUTABLE LEAKPROOF; | |
-- TLDS with more than 1 component | |
CREATE TABLE IF NOT EXISTS yume.tld_overrides ( | |
tld text PRIMARY KEY NOT NULL | |
); | |
INSERT INTO yume.tld_overrides(tld) VALUES ('.ne.jp') ON CONFLICT DO NOTHING; | |
INSERT INTO yume.tld_overrides(tld) VALUES ('.co.jp') ON CONFLICT DO NOTHING; | |
INSERT INTO yume.tld_overrides(tld) VALUES ('.or.jp') ON CONFLICT DO NOTHING; | |
-- convert hostname to TLD | |
CREATE OR REPLACE FUNCTION yume.get_tld(text) RETURNS text AS $$ | |
WITH overridden AS ( | |
SELECT tld FROM yume.tld_overrides WHERE substring($1 FROM length($1) - length(tld) + 1) = tld | |
) | |
SELECT | |
CASE | |
WHEN EXISTS(SELECT * FROM overridden) THEN (SELECT tld FROM overridden) | |
ELSE '.' || yume.host_last_component($1) | |
END | |
END; | |
$$ LANGUAGE SQL STABLE LEAKPROOF; | |
COMMIT; | |
BEGIN TRANSACTION READ ONLY; | |
-- actually query, to save to csv wrap the query without the last semicolon in '\copy ( ... ) TO '/tmp/spam.csv' WITH (FORMAT CSV, HEADER); | |
WITH typed AS ( | |
SELECT | |
*, | |
(SELECT | |
CASE | |
WHEN | |
cw IS NULL AND | |
TEXT ~ '^(@[a-zA-Z0-9_]+@[a-zA-Z0-9_.]+\s*)+$' AND | |
array_length(note."attachedFileTypes", 1) = 1 | |
THEN 'qr' | |
WHEN | |
cw IS NULL AND | |
TEXT ~ 'サイトに核を飛ばすことです!' AND | |
json_array_length(note."mentionedRemoteUsers"::json) >= 5 AND | |
array_length(note."attachedFileTypes", 1) IS NULL | |
THEN 'lawyer' | |
ELSE NULL | |
END | |
) as ty | |
FROM note | |
) | |
SELECT | |
count(*), | |
ty, | |
"user".host as src_host, | |
yume.get_tld("user".host) as src_tld, | |
date_trunc('hour', yume.aidx_time(typed."id")) AS utc_hour, | |
json_array_length(typed."mentionedRemoteUsers"::json) AS mentioned | |
FROM typed | |
INNER JOIN "user" ON "user".id = typed."userId" | |
WHERE | |
ty IS NOT NULL AND | |
yume.is_default_name("user".username) AND | |
"user".name IS NULL | |
GROUP BY utc_hour, ty, mentioned, "user".host | |
ORDER BY count DESC; | |
ROLLBACK; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(cowplot) | |
library(colorspace) | |
spam <- read_csv("spam.csv") %>% | |
mutate(time = as_datetime(utc_hour)) %>% | |
group_by(src_host, ty, time) %>% | |
summarize(total_mention = sum(count * (mentioned + 1))) %>% | |
group_by(src_host) %>% | |
mutate(src_host_count = sum(total_mention)) %>% | |
ungroup() %>% | |
mutate(src_prop = src_host_count / sum(total_mention)) %>% | |
mutate(src_prop_disp = map_chr(src_prop * 100, ~sprintf("%.2f%%", .x))) %>% | |
mutate(src_host = str_glue("{src_host} ({src_host_count}, {src_prop_disp})")) %>% | |
mutate(src_host = fct_reorder(src_host, -src_host_count)) | |
spam %>% | |
ggplot(aes(x=time, y=total_mention, fill=src_host)) + | |
geom_col(position="dodge") + | |
scale_x_datetime(date_labels="%Y-%m-%d %H:%M", date_breaks="1 hour") + | |
scale_y_log10() + | |
scale_fill_discrete_qualitative(palette = "Set3") + | |
facet_wrap(~ty, ncol = 1, scale = "free_x") + | |
theme_cowplot(font_size = 18) + | |
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + | |
guides(fill = guide_legend(ncol = 1)) + | |
labs( | |
title = "Spam I know on mi.yumechi.jp", | |
x = "Hour", | |
y = "n_post * (n_mention + 1)", | |
fill = "host" | |
) | |
ggsave("spam.png", width=18, height=14, dpi=300, bg="white") | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
count | ty | src_host | src_tld | utc_hour | mentioned | |
---|---|---|---|---|---|---|
57 | lawyer | urweibo.com | .com | 2024-10-08 13:00:00 | 14 | |
50 | lawyer | urweibo.com | .com | 2024-10-08 13:00:00 | 13 | |
48 | qr | mk.hoshisaki-h.com | .com | 2024-11-04 05:00:00 | 1 | |
46 | qr | mk.hoshisaki-h.com | .com | 2024-11-04 04:00:00 | 1 | |
39 | qr | mk.hoshisaki-h.com | .com | 2024-11-04 04:00:00 | 3 | |
35 | lawyer | urweibo.com | .com | 2024-10-08 13:00:00 | 15 | |
35 | qr | mk.hoshisaki-h.com | .com | 2024-11-04 05:00:00 | 3 | |
32 | qr | mk.hoshisaki-h.com | .com | 2024-11-04 05:00:00 | 2 | |
31 | qr | mk.hoshisaki-h.com | .com | 2024-11-04 04:00:00 | 4 | |
29 | lawyer | misskey.favskey.online | .online | 2024-10-08 13:00:00 | 15 | |
27 | qr | mk.hoshisaki-h.com | .com | 2024-11-04 04:00:00 | 2 | |
26 | lawyer | misskey.favskey.online | .online | 2024-10-08 13:00:00 | 14 | |
24 | qr | mk.hoshisaki-h.com | .com | 2024-11-04 05:00:00 | 5 | |
24 | lawyer | urweibo.com | .com | 2024-10-08 13:00:00 | 12 | |
22 | qr | catcore.life | .life | 2024-11-04 04:00:00 | 2 | |
19 | qr | catcore.life | .life | 2024-11-04 04:00:00 | 1 | |
19 | lawyer | n7.social | .social | 2024-10-08 13:00:00 | 11 | |
18 | qr | mk.hoshisaki-h.com | .com | 2024-11-04 04:00:00 | 5 | |
18 | lawyer | n7.social | .social | 2024-10-08 13:00:00 | 12 | |
18 | qr | catcore.life | .life | 2024-11-04 04:00:00 | 3 | |
17 | lawyer | ai.wiki | .wiki | 2024-10-08 13:00:00 | 10 | |
16 | qr | mk.hoshisaki-h.com | .com | 2024-11-04 05:00:00 | 4 | |
14 | lawyer | ai.wiki | .wiki | 2024-10-08 13:00:00 | 9 | |
12 | lawyer | misskey.favskey.online | .online | 2024-10-08 13:00:00 | 13 | |
12 | lawyer | comingto.org | .org | 2024-10-08 13:00:00 | 13 | |
12 | qr | catcore.life | .life | 2024-11-04 04:00:00 | 4 | |
10 | lawyer | a.sukazyo.cc | .cc | 2024-10-08 13:00:00 | 14 | |
10 | lawyer | catcore.life | .life | 2024-10-08 13:00:00 | 14 | |
10 | qr | catcore.life | .life | 2024-11-04 04:00:00 | 5 | |
9 | qr | mk.hoshisaki-h.com | .com | 2024-11-04 03:00:00 | 5 | |
9 | lawyer | n7.social | .social | 2024-10-08 13:00:00 | 8 | |
8 | lawyer | n7.social | .social | 2024-10-08 13:00:00 | 10 | |
8 | qr | mk.hoshisaki-h.com | .com | 2024-11-04 03:00:00 | 4 | |
8 | lawyer | mk.hoshisaki-h.com | .com | 2024-10-08 13:00:00 | 14 | |
7 | lawyer | n7.social | .social | 2024-10-08 13:00:00 | 13 | |
7 | qr | catcore.life | .life | 2024-11-04 03:00:00 | 5 | |
7 | lawyer | comingto.org | .org | 2024-10-08 13:00:00 | 15 | |
6 | lawyer | catcore.life | .life | 2024-10-08 13:00:00 | 12 | |
6 | lawyer | comingto.org | .org | 2024-10-08 13:00:00 | 14 | |
6 | lawyer | a.sukazyo.cc | .cc | 2024-10-08 13:00:00 | 12 | |
6 | lawyer | misskey.favskey.online | .online | 2024-10-08 13:00:00 | 12 | |
6 | lawyer | urweibo.com | .com | 2024-10-08 14:00:00 | 12 | |
6 | lawyer | a.sukazyo.cc | .cc | 2024-10-08 13:00:00 | 15 | |
5 | lawyer | ai.wiki | .wiki | 2024-10-08 13:00:00 | 8 | |
5 | lawyer | n7.social | .social | 2024-10-08 13:00:00 | 14 | |
5 | lawyer | catcore.life | .life | 2024-10-08 13:00:00 | 15 | |
4 | lawyer | n7.social | .social | 2024-10-08 14:00:00 | 9 | |
4 | lawyer | n7.social | .social | 2024-10-08 13:00:00 | 9 | |
4 | lawyer | urweibo.com | .com | 2024-10-08 13:00:00 | 11 | |
4 | lawyer | a.sukazyo.cc | .cc | 2024-10-08 13:00:00 | 13 | |
4 | lawyer | catcore.life | .life | 2024-10-08 13:00:00 | 13 | |
4 | lawyer | mk.hoshisaki-h.com | .com | 2024-10-08 13:00:00 | 15 | |
4 | lawyer | ai.wiki | .wiki | 2024-10-08 14:00:00 | 9 | |
4 | lawyer | mkacg.com | .com | 2024-10-08 13:00:00 | 5 | |
4 | lawyer | n7.social | .social | 2024-10-08 14:00:00 | 11 | |
4 | lawyer | urweibo.com | .com | 2024-10-08 14:00:00 | 11 | |
4 | qr | catcore.life | .life | 2024-11-04 03:00:00 | 4 | |
3 | lawyer | mkacg.com | .com | 2024-10-08 13:00:00 | 7 | |
3 | lawyer | a.sukazyo.cc | .cc | 2024-10-08 13:00:00 | 11 | |
3 | qr | mk.hoshisaki-h.com | .com | 2024-11-04 04:00:00 | 0 | |
2 | lawyer | ai.wiki | .wiki | 2024-10-08 13:00:00 | 7 | |
2 | qr | catcore.life | .life | 2024-11-04 04:00:00 | 0 | |
2 | lawyer | ai.wiki | .wiki | 2024-10-08 14:00:00 | 10 | |
2 | lawyer | mk.hoshisaki-h.com | .com | 2024-10-08 13:00:00 | 12 | |
2 | lawyer | mk.hoshisaki-h.com | .com | 2024-10-08 13:00:00 | 13 | |
2 | lawyer | a.sukazyo.cc | .cc | 2024-10-08 13:00:00 | 10 | |
2 | lawyer | n7.social | .social | 2024-10-08 13:00:00 | 7 | |
2 | qr | mk.hoshisaki-h.com | .com | 2024-11-04 05:00:00 | 0 | |
1 | qr | tickler.cc | .cc | 2024-11-05 06:00:00 | 1 | |
1 | lawyer | msk.ilnk.info | .info | 2024-10-08 13:00:00 | 12 | |
1 | lawyer | comingto.org | .org | 2024-10-08 13:00:00 | 12 | |
1 | lawyer | misskey.favskey.online | .online | 2024-10-08 13:00:00 | 11 | |
1 | qr | misskey.gg | .gg | 2024-11-04 04:00:00 | 1 | |
1 | lawyer | comingto.org | .org | 2024-10-08 13:00:00 | 11 | |
1 | lawyer | catcore.life | .life | 2024-10-08 13:00:00 | 11 | |
1 | lawyer | mkacg.com | .com | 2024-10-08 13:00:00 | 10 | |
1 | lawyer | comingto.org | .org | 2024-10-08 13:00:00 | 10 | |
1 | lawyer | catcore.life | .life | 2024-10-08 13:00:00 | 10 | |
1 | lawyer | mstdn.beer | .beer | 2024-10-08 13:00:00 | 8 | |
1 | lawyer | a.sukazyo.cc | .cc | 2024-10-08 13:00:00 | 7 | |
1 | lawyer | msk.ilnk.info | .info | 2024-10-08 13:00:00 | 15 | |
1 | lawyer | n7.social | .social | 2024-10-08 13:00:00 | 15 | |
1 | lawyer | zmi.im | .im | 2024-10-08 13:00:00 | 14 | |
1 | lawyer | n7.social | .social | 2024-10-08 13:00:00 | 16 | |
1 | lawyer | n7.social | .social | 2024-10-08 13:00:00 | 19 | |
1 | lawyer | n7.social | .social | 2024-10-08 13:00:00 | 17 | |
1 | lawyer | n7.social | .social | 2024-10-08 14:00:00 | 7 | |
1 | qr | misskey.t1nyb0x.cloud | .cloud | 2024-11-04 05:00:00 | 4 | |
1 | lawyer | n7.social | .social | 2024-10-08 13:00:00 | 6 | |
1 | lawyer | n7.social | .social | 2024-10-08 14:00:00 | 10 | |
1 | lawyer | mkacg.com | .com | 2024-10-08 13:00:00 | 6 | |
1 | qr | misskey.shunrin.com | .com | 2024-11-04 11:00:00 | 0 | |
1 | lawyer | a.sukazyo.cc | .cc | 2024-10-08 14:00:00 | 12 | |
1 | lawyer | mk.hoshisaki-h.com | .com | 2024-10-08 14:00:00 | 12 | |
1 | lawyer | n7.social | .social | 2024-10-08 14:00:00 | 12 | |
1 | qr | catcore.life | .life | 2024-11-04 03:00:00 | 3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment