Created
December 27, 2018 20:54
-
-
Save dankleiman/2e00b967697e22836a76ad0e75023a9e to your computer and use it in GitHub Desktop.
Word Challenge from https://www.reddit.com/r/SQL/comments/a9itx1/merry_christmas_have_a_sql_challenge_i_thought_of/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- First set up the relevant DB/tables | |
CREATE DATABASE words; | |
CREATE TABLE word_list (word text); | |
CREATE TABLE words (word text, len int, letters text); | |
-- Create a seed script for `word_list` and run the sql file into your db | |
-- cat /usr/share/dict/words | awk '{ print "INSERT INTO word_list VALUES('\''" $1 "'\'');" }' > seed.sql | |
-- psql words -f seed.sql | |
-- Now we need to build the `words` table from pg functions off the `word_list` table | |
-- First, let's do length | |
select word, length(word) from word_list limit 10; | |
word | length | |
----------+-------- | |
A | 1 | |
a | 1 | |
aa | 2 | |
aal | 3 | |
aalii | 5 | |
aam | 3 | |
Aani | 4 | |
aardvark | 8 | |
aardwolf | 8 | |
Aaron | 5 | |
(10 rows) | |
-- Now, sorting | |
-- if we can break each word down into an array, sort the array, and recombine the letters into a word, | |
-- we now have an ordered `letters` column for each work | |
-- step 1: break the words apart into letter arrays | |
select word, string_to_array(word, NULL) from word_list limit 10; | |
word | string_to_array | |
----------+------------------- | |
A | {A} | |
a | {a} | |
aa | {a,a} | |
aal | {a,a,l} | |
aalii | {a,a,l,i,i} | |
aam | {a,a,m} | |
Aani | {A,a,n,i} | |
aardvark | {a,a,r,d,v,a,r,k} | |
aardwolf | {a,a,r,d,w,o,l,f} | |
Aaron | {A,a,r,o,n} | |
(10 rows) | |
-- step 2: unnest the arrays so we can order the letters | |
SELECT | |
word, | |
lower(l.*) as letter | |
FROM | |
word_list, unnest(string_to_array(word, NULL)) l | |
WHERE word like 'a%' | |
ORDER BY word, letter | |
word | letter | |
-------------------------+-------- | |
a | a | |
aa | a | |
aa | a | |
aal | a | |
aal | a | |
aal | l | |
aalii | a | |
aalii | a | |
aalii | i | |
aalii | i | |
aalii | l | |
aam | a | |
aam | a | |
aam | m | |
-- step 3: using the ordered letters as a subquery, re-aggregate them into an array | |
SELECT | |
word, | |
array_agg(letter) | |
FROM ( | |
SELECT | |
word, | |
lower(l.*) as letter | |
FROM | |
word_list, unnest(string_to_array(word, NULL)) l | |
WHERE word like 'quack%' | |
ORDER BY word, letter | |
) w | |
GROUP BY word | |
word | array_agg | |
--------------+--------------------------- | |
quack | {a,c,k,q,u} | |
quackery | {a,c,e,k,q,r,u,y} | |
quackhood | {a,c,d,h,k,o,o,q,u} | |
quackish | {a,c,h,i,k,q,s,u} | |
quackishly | {a,c,h,i,k,l,q,s,u,y} | |
quackishness | {a,c,e,h,i,k,n,q,s,s,s,u} | |
quackism | {a,c,i,k,m,q,s,u} | |
quackle | {a,c,e,k,l,q,u} | |
quacksalver | {a,a,c,e,k,l,q,r,s,u,v} | |
quackster | {a,c,e,k,q,r,s,t,u} | |
quacky | {a,c,k,q,u,y} | |
(11 rows) | |
-- step 4: glue the words back together and `array_to_string` | |
SELECT | |
word, | |
array_to_string(array_agg(letter), '') as letters | |
FROM ( | |
SELECT | |
word, | |
lower(l.*) as letter | |
FROM | |
word_list, unnest(string_to_array(word, NULL)) l | |
WHERE word like 'quack%' | |
ORDER BY word, letter | |
) w | |
GROUP BY word | |
word | letters | |
--------------+-------------- | |
quack | ackqu | |
quackery | acekqruy | |
quackhood | acdhkooqu | |
quackish | achikqsu | |
quackishly | achiklqsuy | |
quackishness | acehiknqsssu | |
quackism | acikmqsu | |
quackle | aceklqu | |
quacksalver | aaceklqrsuv | |
quackster | acekqrstu | |
quacky | ackquy | |
(11 rows) | |
-- Now that we know we can generate each column in `words` from the `word_list` table, let's put them all back together | |
INSERT INTO words ( | |
SELECT | |
word, | |
length(word) as len, | |
array_to_string(array_agg(letter), '') as letters | |
FROM ( | |
SELECT | |
word, | |
lower(l.*) as letter | |
FROM | |
word_list, unnest(string_to_array(word, NULL)) l | |
ORDER BY word, letter | |
) w | |
GROUP BY word | |
); | |
words=# select * from words where word like 'quack%'; | |
word | len | letters | |
--------------+-----+-------------- | |
quack | 5 | ackqu | |
quackery | 8 | acekqruy | |
quackhood | 9 | acdhkooqu | |
quackish | 8 | achikqsu | |
quackishly | 10 | achiklqsuy | |
quackishness | 12 | acehiknqsssu | |
quackism | 8 | acikmqsu | |
quackle | 7 | aceklqu | |
quacksalver | 11 | aaceklqrsuv | |
quackster | 9 | acekqrstu | |
quacky | 6 | ackquy | |
(11 rows) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment