Skip to content

Instantly share code, notes, and snippets.

@wchargin
Last active September 28, 2021 19:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wchargin/e26cea8567660f00e49bf07d7eefafe4 to your computer and use it in GitHub Desktop.
Save wchargin/e26cea8567660f00e49bf07d7eefafe4 to your computer and use it in GitHub Desktop.
token rarity analysis
WITH project_id(project_id) AS (VALUES(163)),
token_bounds(min_id, max_id) AS (
SELECT
(1000000 * project_id) AS min_id,
(1000000 * (project_id + 1)) AS max_id
FROM project_id
),
project_tokens(token_id) AS (
SELECT token_id FROM tokens CROSS JOIN token_bounds
WHERE min_id <= token_id AND token_id < max_id
),
total_tokens(ct) AS (
SELECT COUNT(1) AS ct FROM project_tokens
),
trait_counts(name, ct) AS (
SELECT feature_name AS name, COUNT(1) AS ct
FROM project_tokens JOIN token_features USING (token_id)
GROUP BY feature_name
),
trait_rarities(name, ct, ln_p) AS (
SELECT name, ct, LN((ct::real) / (SELECT ct::real FROM total_tokens))
FROM trait_counts
ORDER BY ct ASC
),
token_rarities(token_id, sum_ln_p, why) AS (
SELECT token_id, sum_ln_p, why
FROM
project_tokens,
LATERAL (
SELECT
SUM(ln_p) AS sum_ln_p,
STRING_AGG(name || ' (' || ct || ')', ', ' ORDER BY ct ASC) AS why
FROM trait_rarities JOIN (
SELECT feature_name AS name
FROM token_features
WHERE token_id = project_tokens.token_id
) AS these_features
USING (name)
) AS lat
)
SELECT token_id, EXP(sum_ln_p) AS joint_p, why FROM token_rarities
ORDER BY sum_ln_p ASC
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment