Last active
January 31, 2023 02:11
-
-
Save le717/b88648eed4bbeef2734639a8508ecfd9 to your computer and use it in GitHub Desktop.
#vss365 Wrapped 2022 (https://blog.codetri.net/vss365-wrapped-2022/)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SET @year = 2022; | |
-- Get all hosts | |
SELECT | |
w.handle AS `Host`, | |
wd.date AS `Hosting period start date` | |
FROM writers w | |
INNER JOIN writer_dates wd | |
ON w.uid = wd.uid | |
WHERE | |
YEAR(wd.date) = @year | |
ORDER BY | |
wd.date; | |
-- Get repeat hosts | |
SELECT | |
w.handle AS `Host`, | |
wd.date AS `Hosting period start date` | |
FROM | |
writers w | |
INNER JOIN writer_dates wd | |
ON w.uid = wd.uid | |
WHERE | |
YEAR(wd.date) = @year | |
AND w.uid IN ( | |
SELECT w.uid | |
FROM writers w | |
INNER JOIN writer_dates wd | |
ON w.uid = wd.uid | |
WHERE YEAR(wd.date) < @year | |
) | |
ORDER BY | |
wd.date; | |
-- Get new hosts | |
SELECT | |
w.handle AS `Host`, | |
wd.date AS `Hosting period start date` | |
FROM | |
writers w | |
INNER JOIN writer_dates wd | |
ON w.uid = wd.uid | |
WHERE | |
YEAR(wd.date) = @year | |
AND w.uid NOT IN ( | |
SELECT w.uid | |
FROM writers w | |
INNER JOIN writer_dates wd | |
ON w.uid = wd.uid | |
WHERE YEAR(wd.date) < @year | |
) | |
ORDER BY | |
wd.date; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SET @year = 2022; | |
-- Get the longest, shortest, and total prompts | |
SELECT | |
@shortest_prompt := MIN(LENGTH(word)), | |
@longest_prompt := MAX(LENGTH(word)), | |
@total_prompts := COUNT(word) | |
FROM prompts | |
WHERE | |
YEAR(`date`) = @year; | |
-- Get total prompts with media | |
SELECT | |
@total_media_w_media := COUNT(*) AS `Prompts w/ media`, | |
(COUNT(*) / @total_prompts) * 100 AS `Pecent of total` | |
FROM writers w | |
INNER JOIN writer_dates wd | |
ON w.uid = wd.uid | |
INNER JOIN prompts p | |
ON p.uid = w.uid | |
WHERE | |
YEAR(wd.date) = @year | |
AND YEAR(p.date) = @year | |
AND p.media IS NOT NULL; | |
-- Get total prompts with media but no alt text | |
SELECT | |
COUNT(*) AS `Prompts w/ media w/o alt text`, | |
(COUNT(*) / @total_media_w_media) * 100 AS `Pecent of total` | |
FROM writers w | |
INNER JOIN writer_dates wd | |
ON w.uid = wd.uid | |
INNER JOIN prompts p | |
ON p.uid = w.uid | |
WHERE | |
YEAR(wd.date) = @year | |
AND YEAR(p.date) = @year | |
AND p.media IS NOT NULL | |
AND p.media_alt_text IS NULL; | |
-- Basic prompt lengths stats | |
SELECT | |
@shortest_prompt AS `Shortest prompt`, | |
@longest_prompt AS `Longest prompt`, | |
@total_prompts AS `Total prompts this year`, | |
AVG(LENGTH(word)) AS `Average prompt length` | |
FROM prompts | |
WHERE | |
YEAR(`date`) = @year; | |
-- Get the shortest and longest prompts | |
SELECT | |
w.handle AS `Host`, | |
p.date AS `Date`, | |
p.word AS `Prompt`, | |
LENGTH(p.word) AS `Length` | |
FROM prompts p | |
INNER JOIN writers w | |
ON w.uid = p.uid | |
WHERE | |
YEAR(`date`) = @year | |
AND LENGTH(word) IN (@shortest_prompt, @longest_prompt) | |
ORDER BY | |
p.date; | |
-- Prompts repeated | |
SELECT | |
word AS `Prompt`, | |
COUNT(LOWER(word)) AS `Times used` | |
FROM | |
prompts | |
WHERE | |
YEAR(`date`) = @year | |
GROUP BY | |
LOWER(word) | |
HAVING | |
COUNT(LOWER(word)) > 1 | |
ORDER BY | |
COUNT(LOWER(word)), | |
word; | |
-- Prompt lengths and times they occurred | |
SELECT | |
LENGTH(word) AS `Length of prompt`, | |
COUNT(*) AS `Number of prompts` | |
FROM | |
prompts | |
WHERE | |
YEAR(`date`) = @year | |
GROUP BY | |
LENGTH(LOWER(word)) | |
ORDER BY | |
COUNT(*) DESC, | |
LENGTH(word) desc; | |
-- Average length of prompt for each host | |
SELECT | |
w.handle AS `Host`, | |
AVG(LENGTH(p.word)) AS `Average length of prompt` | |
FROM writers w | |
INNER JOIN writer_dates wd | |
ON w.uid = wd.uid | |
INNER JOIN prompts p | |
ON p.uid = w.uid | |
WHERE | |
YEAR(wd.date) = @year | |
AND YEAR(p.date) = @year | |
GROUP BY | |
w.handle | |
ORDER BY | |
AVG(LENGTH(p.word)) DESC; | |
-- Get all prompts used for the year | |
SELECT LOWER(word) AS `Prompt` | |
FROM prompts | |
WHERE YEAR(`date`) = @year | |
ORDER BY `date`; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SET @year = 2022; | |
-- Get total new registations and average per month | |
SELECT | |
COUNT(*) AS `Total`, | |
(COUNT(*) / 12) AS `Average` | |
FROM emails | |
WHERE | |
YEAR(date_added) = @year; | |
-- Monthly breakdown | |
SELECT | |
MONTHNAME(date_added) AS `Month`, | |
COUNT(*) AS `New registations` | |
FROM emails | |
WHERE | |
YEAR(date_added) = @year | |
GROUP BY | |
MONTH(date_added); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from collections import Counter | |
from pathlib import Path | |
# Get the word list | |
word_list = json.loads(Path("2022-word-list.json").read_text(encoding="utf-8")) | |
word_list = word_list["words"] | |
# Set up the counters | |
letters_used = Counter() | |
numbers_used = Counter() | |
special_characters_used = Counter() | |
starting_letters = Counter([word[0] for word in word_list]) | |
ending_letters = Counter([word[-1] for word in word_list]) | |
# We have to split the words into individual letters | |
for word in word_list: | |
ind_letters = list(word) | |
# Identify any numbers used | |
numbers = [c for c in ind_letters if c.isdigit()] | |
if numbers: | |
numbers_used.update(numbers) | |
# Identify any special characters used | |
special_chars = [a for a in ind_letters if (not a.isalpha() and not a.isdigit())] | |
if special_chars: | |
special_characters_used.update(special_chars) | |
ascii_letters = [b for b in ind_letters if b.isalpha()] | |
letters_used.update(ascii_letters) | |
# Detemine which letters were used once | |
letters_used_once = [leuo1 for leuo1 in letters_used if letters_used[leuo1] == 1] | |
# Detemine which letters were the least but at least once. | |
# We need to initalize with the lowest used letter and check from there | |
# if there are addtional letters used that little | |
letters_used_at_least_once = [ | |
(item[0], item[1]) for item in letters_used.most_common() if item[1] > 1 | |
] | |
lowest_count = letters_used_at_least_once[-1][1] | |
letters_used_least = [] | |
for leuo1, count in reversed(letters_used_at_least_once): | |
if count == lowest_count: | |
letters_used_least.append(leuo1) | |
# Find the most common letter used | |
highest_count = letters_used_at_least_once[0][1] | |
letters_most_used = [] | |
for lemu1, count in letters_used_at_least_once: | |
if count == highest_count: | |
letters_most_used.append(lemu1) | |
# Find the most common starting letters | |
highest_starting_letter_count = starting_letters.most_common()[0][1] | |
starting_letters_most_used = [] | |
for slemu1 in starting_letters.most_common(): | |
if slemu1[1] == highest_starting_letter_count: | |
starting_letters_most_used.append(slemu1[0]) | |
# Find the least common starting letters | |
starting_letters_used_at_least_once = [ | |
(item[0], item[1]) for item in starting_letters.most_common() if item[1] > 1 | |
] | |
starting_lowest_count = starting_letters_used_at_least_once[-1][1] | |
starting_letters_used_least = [] | |
for slull1, count in reversed(starting_letters_used_at_least_once): | |
if count == starting_lowest_count: | |
starting_letters_used_least.append(slull1) | |
# Find the most common ending letters | |
highest_ending_letter_count = ending_letters.most_common()[0][1] | |
ending_letters_most_used = [] | |
for elemu1 in ending_letters.most_common(): | |
if elemu1[1] == highest_ending_letter_count: | |
ending_letters_most_used.append(elemu1[0]) | |
# Find the least common ending letters | |
ending_letters_used_at_least_once = [ | |
(item[0], item[1]) for item in ending_letters.most_common() if item[1] > 1 | |
] | |
ending_lowest_count = ending_letters_used_at_least_once[-1][1] | |
ending_letters_used_least = [] | |
for elull1, count in reversed(ending_letters_used_at_least_once): | |
if count == ending_lowest_count: | |
ending_letters_used_least.append(elull1) | |
# Display the information | |
print(f"Number of letters used: {len(letters_used)}") | |
print("Most common letters:") | |
for lemu2 in letters_most_used: | |
print(f"{lemu2}: {letters_used[lemu2]}") | |
if letters_used_once: | |
print("Letters used once:") | |
for leuo2 in letters_used_once: | |
print(f"{leuo2}: {letters_used[leuo2]}") | |
print("Least common letters (but at least once):") | |
for leul2 in letters_used_least: | |
print(f"{leul2}: {letters_used[leul2]}") | |
print("Most common starting letters:") | |
for slemu2 in starting_letters_most_used: | |
print(f"{slemu2}: {starting_letters[slemu2]}") | |
if starting_letters_used_least: | |
print("Least common starting letters:") | |
for slull2 in starting_letters_used_least: | |
print(f"{slull2}: {starting_letters[slull2]}") | |
print("Most common ending letters:") | |
for slemu2 in ending_letters_most_used: | |
print(f"{slemu2}: {ending_letters[slemu2]}") | |
if starting_letters_used_least: | |
print("Least common ending letters:") | |
for elull2 in ending_letters_used_least: | |
print(f"{elull2}: {ending_letters[elull2]}") | |
if numbers_used: | |
print("Numbers used:") | |
for num in numbers_used.most_common(): | |
print(f"{num[0]}: {num[1]}") | |
if special_characters_used: | |
print("Special characters used:") | |
for spec in special_characters_used.most_common(): | |
print(f"{spec[0]}: {spec[1]}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment